{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0409adcc-b61e-44d7-a324-5a45104002a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "606fed19-3117-4d24-806b-f7c5a28808cd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>smoking</th>\n",
       "      <th>cancer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>66.2</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>43.9</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>44.7</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>39.3</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>58.7</td>\n",
       "      <td>male</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>27.0</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>67.4</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>42.4</td>\n",
       "      <td>male</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>53.2</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>47.5</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>64.6</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>29.4</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>46.8</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>46.2</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>61.3</td>\n",
       "      <td>male</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>39.0</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>48.3</td>\n",
       "      <td>male</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>41.2</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>50.4</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>55.8</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>39.0</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>61.4</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>59.0</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>55.0</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>59.0</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>43.2</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>48.8</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>40.6</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>47.3</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>55.3</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>43.1</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>46.0</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>43.1</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>41.5</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>43.3</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>49.9</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>38.8</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>52.3</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>66.6</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>57.4</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>48.1</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>41.1</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>42.5</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>66.9</td>\n",
       "      <td>female</td>\n",
       "      <td>yes</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>50.5</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>43.6</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>51.9</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>71.0</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>51.2</td>\n",
       "      <td>female</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>56.2</td>\n",
       "      <td>male</td>\n",
       "      <td>no</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     age  gender smoking  cancer\n",
       "0   66.2  female      no       0\n",
       "1   43.9    male      no       0\n",
       "2   44.7    male      no       1\n",
       "3   39.3  female      no       0\n",
       "4   58.7    male     yes       1\n",
       "5   27.0    male      no       1\n",
       "6   67.4  female      no       0\n",
       "7   42.4    male     yes       1\n",
       "8   53.2  female      no       0\n",
       "9   47.5  female      no       0\n",
       "10  64.6  female      no       0\n",
       "11  29.4  female     yes       1\n",
       "12  46.8  female      no       0\n",
       "13  46.2    male      no       1\n",
       "14  61.3    male     yes       1\n",
       "15  39.0  female      no       1\n",
       "16  48.3    male     yes       1\n",
       "17  41.2  female      no       0\n",
       "18  50.4  female     yes       1\n",
       "19  55.8  female     yes       1\n",
       "20  39.0    male      no       1\n",
       "21  61.4  female      no       0\n",
       "22  59.0    male      no       1\n",
       "23  55.0  female      no       0\n",
       "24  59.0    male      no       1\n",
       "25  43.2    male      no       0\n",
       "26  48.8  female      no       0\n",
       "27  40.6  female     yes       1\n",
       "28  47.3    male      no       0\n",
       "29  55.3  female      no       1\n",
       "30  43.1    male      no       0\n",
       "31  46.0  female     yes       1\n",
       "32  43.1  female      no       1\n",
       "33  41.5    male      no       1\n",
       "34  43.3  female      no       0\n",
       "35  49.9  female     yes       1\n",
       "36  38.8    male      no       1\n",
       "37  52.3  female      no       0\n",
       "38  66.6  female     yes       1\n",
       "39  57.4  female     yes       1\n",
       "40  48.1    male      no       1\n",
       "41  41.1    male      no       0\n",
       "42  42.5  female      no       0\n",
       "43  66.9  female     yes       0\n",
       "44  50.5    male      no       0\n",
       "45  43.6  female      no       0\n",
       "46  51.9  female      no       0\n",
       "47  71.0  female      no       0\n",
       "48  51.2  female      no       0\n",
       "49  56.2    male      no       0"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取示例数据\n",
    "data = pd.read_csv(\"smoke.csv\")\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e59cba99-7ae0-4376-96b3-e0e153b6bf28",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>cancer</th>\n",
       "      <th>gender_male</th>\n",
       "      <th>smoking_yes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>66.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>43.9</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>44.7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>39.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>58.7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>27.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>67.4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>42.4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>53.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>47.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>64.6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>29.4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>46.8</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>46.2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>61.3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>39.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>48.3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>41.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>50.4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>55.8</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>39.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>61.4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>59.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>55.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>59.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>43.2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>48.8</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>40.6</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>47.3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>55.3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>43.1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>46.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>43.1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>41.5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>43.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>49.9</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>38.8</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>52.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>66.6</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>57.4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>48.1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>41.1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>42.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>66.9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>50.5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>43.6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>51.9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>71.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>51.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>56.2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     age  cancer  gender_male  smoking_yes\n",
       "0   66.2       0            0            0\n",
       "1   43.9       0            1            0\n",
       "2   44.7       1            1            0\n",
       "3   39.3       0            0            0\n",
       "4   58.7       1            1            1\n",
       "5   27.0       1            1            0\n",
       "6   67.4       0            0            0\n",
       "7   42.4       1            1            1\n",
       "8   53.2       0            0            0\n",
       "9   47.5       0            0            0\n",
       "10  64.6       0            0            0\n",
       "11  29.4       1            0            1\n",
       "12  46.8       0            0            0\n",
       "13  46.2       1            1            0\n",
       "14  61.3       1            1            1\n",
       "15  39.0       1            0            0\n",
       "16  48.3       1            1            1\n",
       "17  41.2       0            0            0\n",
       "18  50.4       1            0            1\n",
       "19  55.8       1            0            1\n",
       "20  39.0       1            1            0\n",
       "21  61.4       0            0            0\n",
       "22  59.0       1            1            0\n",
       "23  55.0       0            0            0\n",
       "24  59.0       1            1            0\n",
       "25  43.2       0            1            0\n",
       "26  48.8       0            0            0\n",
       "27  40.6       1            0            1\n",
       "28  47.3       0            1            0\n",
       "29  55.3       1            0            0\n",
       "30  43.1       0            1            0\n",
       "31  46.0       1            0            1\n",
       "32  43.1       1            0            0\n",
       "33  41.5       1            1            0\n",
       "34  43.3       0            0            0\n",
       "35  49.9       1            0            1\n",
       "36  38.8       1            1            0\n",
       "37  52.3       0            0            0\n",
       "38  66.6       1            0            1\n",
       "39  57.4       1            0            1\n",
       "40  48.1       1            1            0\n",
       "41  41.1       0            1            0\n",
       "42  42.5       0            0            0\n",
       "43  66.9       0            0            1\n",
       "44  50.5       0            1            0\n",
       "45  43.6       0            0            0\n",
       "46  51.9       0            0            0\n",
       "47  71.0       0            0            0\n",
       "48  51.2       0            0            0\n",
       "49  56.2       0            1            0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 为data的“gender”和“smoking”分类变量创建虚拟变量\n",
    "logistics_data = pd.get_dummies(data, \n",
    "                                columns=[\"gender\", \"smoking\"], \n",
    "                                dtype=int, \n",
    "                                drop_first=True)\n",
    "logistics_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8b751af8-462e-456f-9e57-b895e4898119",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>cancer</th>\n",
       "      <th>gender_male</th>\n",
       "      <th>smoking_yes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>age</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cancer</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gender_male</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>smoking_yes</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               age  cancer  gender_male  smoking_yes\n",
       "age           True   False        False        False\n",
       "cancer       False    True        False        False\n",
       "gender_male  False   False         True        False\n",
       "smoking_yes  False   False        False         True"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看自变量之间所有相关系数是否有大于0.8的\n",
    "logistics_data.corr().abs() > 0.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "2403ed8d-1801-4353-ab3e-e732fc14fc4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 创建构建逻辑回归模型所需的因变量和自变量\n",
    "y = logistics_data['cancer']\n",
    "x = logistics_data.drop('cancer', axis=1)\n",
    "# 在自变量里添加一个常量（为了引入截距）\n",
    "x = sm.add_constant(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "cd8660f2-a25a-4f92-854d-d6527ef4cc79",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.426845\n",
      "         Iterations 7\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Logit Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>        <td>cancer</td>      <th>  No. Observations:  </th>  <td>    50</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                 <td>Logit</td>      <th>  Df Residuals:      </th>  <td>    46</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>                 <td>MLE</td>       <th>  Df Model:          </th>  <td>     3</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>            <td>Thu, 16 Nov 2023</td> <th>  Pseudo R-squ.:     </th>  <td>0.3835</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                <td>20:00:44</td>     <th>  Log-Likelihood:    </th> <td> -21.342</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>converged:</th>             <td>True</td>       <th>  LL-Null:           </th> <td> -34.617</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>     <td>nonrobust</td>    <th>  LLR p-value:       </th> <td>7.314e-06</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "       <td></td>          <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th>       <td>    2.0716</td> <td>    2.489</td> <td>    0.832</td> <td> 0.405</td> <td>   -2.806</td> <td>    6.949</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>age</th>         <td>   -0.0789</td> <td>    0.051</td> <td>   -1.552</td> <td> 0.121</td> <td>   -0.179</td> <td>    0.021</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>gender_male</th> <td>    1.8148</td> <td>    0.817</td> <td>    2.222</td> <td> 0.026</td> <td>    0.214</td> <td>    3.415</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>smoking_yes</th> <td>    4.4339</td> <td>    1.346</td> <td>    3.294</td> <td> 0.001</td> <td>    1.796</td> <td>    7.072</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/latex": [
       "\\begin{center}\n",
       "\\begin{tabular}{lclc}\n",
       "\\toprule\n",
       "\\textbf{Dep. Variable:}   &      cancer      & \\textbf{  No. Observations:  } &       50    \\\\\n",
       "\\textbf{Model:}           &      Logit       & \\textbf{  Df Residuals:      } &       46    \\\\\n",
       "\\textbf{Method:}          &       MLE        & \\textbf{  Df Model:          } &        3    \\\\\n",
       "\\textbf{Date:}            & Thu, 16 Nov 2023 & \\textbf{  Pseudo R-squ.:     } &   0.3835    \\\\\n",
       "\\textbf{Time:}            &     20:00:44     & \\textbf{  Log-Likelihood:    } &   -21.342   \\\\\n",
       "\\textbf{converged:}       &       True       & \\textbf{  LL-Null:           } &   -34.617   \\\\\n",
       "\\textbf{Covariance Type:} &    nonrobust     & \\textbf{  LLR p-value:       } & 7.314e-06   \\\\\n",
       "\\bottomrule\n",
       "\\end{tabular}\n",
       "\\begin{tabular}{lcccccc}\n",
       "                      & \\textbf{coef} & \\textbf{std err} & \\textbf{z} & \\textbf{P$> |$z$|$} & \\textbf{[0.025} & \\textbf{0.975]}  \\\\\n",
       "\\midrule\n",
       "\\textbf{const}        &       2.0716  &        2.489     &     0.832  &         0.405        &       -2.806    &        6.949     \\\\\n",
       "\\textbf{age}          &      -0.0789  &        0.051     &    -1.552  &         0.121        &       -0.179    &        0.021     \\\\\n",
       "\\textbf{gender\\_male} &       1.8148  &        0.817     &     2.222  &         0.026        &        0.214    &        3.415     \\\\\n",
       "\\textbf{smoking\\_yes} &       4.4339  &        1.346     &     3.294  &         0.001        &        1.796    &        7.072     \\\\\n",
       "\\bottomrule\n",
       "\\end{tabular}\n",
       "%\\caption{Logit Regression Results}\n",
       "\\end{center}"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                           Logit Regression Results                           \n",
       "==============================================================================\n",
       "Dep. Variable:                 cancer   No. Observations:                   50\n",
       "Model:                          Logit   Df Residuals:                       46\n",
       "Method:                           MLE   Df Model:                            3\n",
       "Date:                Thu, 16 Nov 2023   Pseudo R-squ.:                  0.3835\n",
       "Time:                        20:00:44   Log-Likelihood:                -21.342\n",
       "converged:                       True   LL-Null:                       -34.617\n",
       "Covariance Type:            nonrobust   LLR p-value:                 7.314e-06\n",
       "===============================================================================\n",
       "                  coef    std err          z      P>|z|      [0.025      0.975]\n",
       "-------------------------------------------------------------------------------\n",
       "const           2.0716      2.489      0.832      0.405      -2.806       6.949\n",
       "age            -0.0789      0.051     -1.552      0.121      -0.179       0.021\n",
       "gender_male     1.8148      0.817      2.222      0.026       0.214       3.415\n",
       "smoking_yes     4.4339      1.346      3.294      0.001       1.796       7.072\n",
       "===============================================================================\n",
       "\"\"\""
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 构建逻辑回归模型，并进行数据拟合\n",
    "result = sm.Logit(y, x).fit()\n",
    "# 输出拟合结果\n",
    "result.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0a2e0da1-c284-47df-8168-56d0ab50cb00",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6.139848085022786\n",
      "84.25938856524215\n"
     ]
    }
   ],
   "source": [
    "# 计算系数的含义\n",
    "print(np.exp(1.8148))\n",
    "print(np.exp(4.4339))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53eaafe7-fbe8-4e37-9d7c-73960359d050",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
